0416235 劉昱劭
import warnings; warnings.simplefilter('ignore')
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
protein = pd.read_csv("nuclear.csv", index_col=0)
#protein.groupby(['class']).describe(percentiles=[]).transpose().head()
proteinData = [protein.iloc[:,0:18], protein.iloc[:,11:21], protein.iloc[:,21:31], protein.iloc[:,31:41],
protein.iloc[:,41:51], protein.iloc[:,51:61], protein.iloc[:,61:71], protein.iloc[:,71:77]]
#print(proteinData)
# Fill Missing Value with avg value of group
na_cols = protein.columns[protein.isna().any()].tolist()
print(na_cols)
fill_protein = protein.copy()
for n in na_cols:
fill_protein[n] = fill_protein.groupby(['class'], sort=False)[n].apply(lambda x: x.fillna(x.mean()))
print(fill_protein.columns[fill_protein.isna().any()].tolist())
target_encoded_protein = fill_protein.copy()
target_encoded_protein['target'] = target_encoded_protein['class'].astype('category').cat.codes
target_encoded_protein.groupby(['class']).head(1).iloc[:, -10:]
ground_truth = target_encoded_protein.iloc[:, -1].values
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import matplotlib.colors as mcolors
X = target_encoded_protein.iloc[:, :77].values
y = ground_truth.copy()
target_names = target_encoded_protein['class'].unique()
pca = PCA(n_components=2)
X_PCA = pca.fit(X).transform(X)
lda = LinearDiscriminantAnalysis(n_components=2)
X_LDA = lda.fit(X, y).transform(X)
# Percentage of variance explained for each components
print('explained variance ratio (first two components): %s'
% str(pca.explained_variance_ratio_))
plt.figure(figsize=(15, 6))
plt.subplot(1,2,1)
colors = flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71", 'm', 'darkorange']
#colors = list(mcolors.cnames)
lw = 2
for color, i, target_name in zip(colors, range(0, len(target_names)), target_names):
plt.scatter(X_PCA[y == i, 0], X_PCA[y == i, 1], color=color, alpha=.8, lw=lw,
label=target_name)
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title('PCA')
plt.subplot(1,2,2)
for color, i, target_name in zip(colors, range(0, len(target_names)), target_names):
plt.scatter(X_LDA[y == i, 0], X_LDA[y == i, 1], alpha=.8, color=color,
label=target_name)
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title('LDA')
plt.show()
from sklearn.metrics.cluster import v_measure_score, adjusted_rand_score, completeness_score, homogeneity_score
rstCol = ['v_measure', 'completeness', 'homogeneity', 'rand_score']
clusterResult = pd.DataFrame(columns=rstCol)
import time
import warnings
import numpy as np
import matplotlib.pyplot as plt
from sklearn import cluster, mixture
from sklearn.neighbors import kneighbors_graph
from sklearn.preprocessing import StandardScaler
from itertools import cycle, islice
# ============
# Set up cluster parameters
# ============
plt.figure(figsize=(9 * 2 + 3, 11))
plt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05,
hspace=.01)
plot_num = 1
default_base = {'quantile': .3,
'eps': .3,
'damping': .9,
'preference': -200,
'n_neighbors': 10,
'n_clusters': 8}
dataName = [ 'Origin', 'PCA', 'LDA']
datasets = [ ([fill_protein.iloc[:, :77].values, ground_truth], {}),
([X_PCA, ground_truth], {}),
([X_LDA, ground_truth], {})
]
rstCol = ['v_measure', 'completeness', 'homogeneity', 'rand_score']
clusterResult = pd.DataFrame(columns=rstCol)
for i_dataset, (dataset, algo_params) in enumerate(datasets):
# update parameters with dataset-specific values
params = default_base.copy()
params.update(algo_params)
X, y = dataset
# normalize dataset for easier parameter selection
X = StandardScaler().fit_transform(X)
# estimate bandwidth for mean shift
bandwidth = cluster.estimate_bandwidth(X, quantile=params['quantile'])
# connectivity matrix for structured Ward
connectivity = kneighbors_graph(
X, n_neighbors=params['n_neighbors'], include_self=False)
# make connectivity symmetric
connectivity = 0.5 * (connectivity + connectivity.T)
# ============
# Create cluster objects
# ============
ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)
dbscan = cluster.DBSCAN(eps=params['eps'])
affinity_propagation = cluster.AffinityPropagation(
damping=params['damping'], preference=params['preference'])
clustering_algorithms = (
('AffinityPropagation', affinity_propagation),
('MeanShift', ms),
('DBSCAN', dbscan)
)
""" Only plot scatterplot for 2 dim data """
dimension = X.shape[1]
if dimension < 3:
colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a',
'#f781bf', '#a65628', '#984ea3',
'#999999', '#e41a1c', '#dede00']),
int(max(ground_truth) + 1))))
plt.subplot(len(datasets)+1, len(clustering_algorithms)+1, plot_num)
if i_dataset == 1:
plt.title('Ground Truth', size=18)
plt.ylabel(dataName[i_dataset], fontsize=18)
plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[ground_truth])
xLimit = X[:,0].min()-0.4, X[:,0].max()+0.4
yLimit = X[:,1].min()-0.4, X[:,1].max()+0.4
plt.xlim(xLimit)
plt.ylim(yLimit)
plt.xticks(())
plt.yticks(())
plot_num += 1
for i_algo, (name, algorithm) in enumerate(clustering_algorithms):
t0 = time.time()
# catch warnings related to kneighbors_graph
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
message="the number of connected components of the " +
"connectivity matrix is [0-9]{1,2}" +
" > 1. Completing it to avoid stopping the tree early.",
category=UserWarning)
warnings.filterwarnings(
"ignore",
message="Graph is not fully connected, spectral embedding" +
" may not work as expected.",
category=UserWarning)
algorithm.fit(X)
t1 = time.time()
if hasattr(algorithm, 'labels_'):
y_pred = algorithm.labels_.astype(np.int)
else:
y_pred = algorithm.predict(X)
dimension = X.shape[1]
if dimension <= 3:
plt.subplot(len(datasets)+1, len(clustering_algorithms)+1, plot_num)
if i_dataset == 1:
plt.title(name, size=18)
colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a',
'#f781bf', '#a65628', '#984ea3',
'#999999', '#e41a1c', '#dede00']),
int(max(y_pred) + 1))))
# add black color for outliers (if any)
colors = np.append(colors, ["#000000"])
plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[y_pred])
xLimit = X[:,0].min()-0.4, X[:,0].max()+0.4
yLimit = X[:,1].min()-0.4, X[:,1].max()+0.4
plt.xlim(xLimit)
plt.ylim(yLimit)
plt.xticks(())
plt.yticks(())
plt.text(.99, .01, ('%.2fs' % (t1 - t0)).lstrip('0'),
transform=plt.gca().transAxes, size=15,
horizontalalignment='right')
plot_num += 1
clusterResult.loc[name] = v_measure_score(ground_truth, y_pred), \
completeness_score(ground_truth, y_pred), homogeneity_score(ground_truth, y_pred), \
adjusted_rand_score(ground_truth, y_pred)
print(dataName[i_dataset], '(%d dim)' % dimension )
display(clusterResult.transpose())
print('-'*50)
if dimension <= 3:
plt.show()
因為資料過於靠近,難以分出 8 群
import time
import warnings
import numpy as np
import matplotlib.pyplot as plt
from sklearn import cluster, mixture
from sklearn.neighbors import kneighbors_graph
from sklearn.preprocessing import StandardScaler
from itertools import cycle, islice
# ============
# Set up cluster parameters
# ============
plt.figure(figsize=(9 * 2 + 3, 11))
plt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05,
hspace=.01)
plot_num = 1
default_base = {'quantile': .3,
'eps': .3,
'damping': .9,
'preference': -200,
'n_neighbors': 10,
'n_clusters': 8}
dataName = [ 'Origin', 'PCA', 'LDA']
datasets = [ ([fill_protein.iloc[:, :77].values, ground_truth], {}),
([X_PCA, ground_truth], {}),
([X_LDA, ground_truth], {})
]
rstCol = ['v_measure', 'completeness', 'homogeneity', 'rand_score']
clusterResult = pd.DataFrame(columns=rstCol)
for i_dataset, (dataset, algo_params) in enumerate(datasets):
# update parameters with dataset-specific values
params = default_base.copy()
params.update(algo_params)
X, y = dataset
# normalize dataset for easier parameter selection
X = StandardScaler().fit_transform(X)
# estimate bandwidth for mean shift
bandwidth = cluster.estimate_bandwidth(X, quantile=params['quantile'])
# connectivity matrix for structured Ward
connectivity = kneighbors_graph(
X, n_neighbors=params['n_neighbors'], include_self=False)
# make connectivity symmetric
connectivity = 0.5 * (connectivity + connectivity.T)
# ============
# Create cluster objects
# ============
two_means = cluster.MiniBatchKMeans(n_clusters=params['n_clusters'])
ward = cluster.AgglomerativeClustering(
n_clusters=params['n_clusters'], linkage='ward',
connectivity=connectivity)
spectral = cluster.SpectralClustering(
n_clusters=params['n_clusters'], eigen_solver='arpack',
affinity="nearest_neighbors")
average_linkage = cluster.AgglomerativeClustering(
linkage="average", affinity="cityblock",
n_clusters=params['n_clusters'], connectivity=connectivity)
birch = cluster.Birch(n_clusters=params['n_clusters'])
gmm = mixture.GaussianMixture(
n_components=params['n_clusters'], covariance_type='full')
clustering_algorithms = (
('KMeans', two_means),
('SpectralClustering', spectral),
('Ward', ward),
('AgglomerativeClustering', average_linkage),
('Birch', birch),
('GaussianMixture', gmm)
)
""" Only plot scatterplot for 2 dim data """
dimension = X.shape[1]
if dimension < 3:
colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a',
'#f781bf', '#a65628', '#984ea3',
'#999999', '#e41a1c', '#dede00']),
int(max(ground_truth) + 1))))
plt.subplot(len(datasets)+1, len(clustering_algorithms)+1, plot_num)
if i_dataset == 1:
plt.title('Ground Truth', size=18)
plt.ylabel(dataName[i_dataset], fontsize=18)
plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[ground_truth])
xLimit = X[:,0].min()-0.4, X[:,0].max()+0.4
yLimit = X[:,1].min()-0.4, X[:,1].max()+0.4
plt.xlim(xLimit)
plt.ylim(yLimit)
plt.xticks(())
plt.yticks(())
plot_num += 1
for i_algo, (name, algorithm) in enumerate(clustering_algorithms):
t0 = time.time()
# catch warnings related to kneighbors_graph
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
message="the number of connected components of the " +
"connectivity matrix is [0-9]{1,2}" +
" > 1. Completing it to avoid stopping the tree early.",
category=UserWarning)
warnings.filterwarnings(
"ignore",
message="Graph is not fully connected, spectral embedding" +
" may not work as expected.",
category=UserWarning)
algorithm.fit(X)
t1 = time.time()
if hasattr(algorithm, 'labels_'):
y_pred = algorithm.labels_.astype(np.int)
else:
y_pred = algorithm.predict(X)
dimension = X.shape[1]
if dimension <= 3:
plt.subplot(len(datasets)+1, len(clustering_algorithms)+1, plot_num)
if i_dataset == 1:
plt.title(name, size=18)
colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a',
'#f781bf', '#a65628', '#984ea3',
'#999999', '#e41a1c', '#dede00']),
int(max(y_pred) + 1))))
# add black color for outliers (if any)
colors = np.append(colors, ["#000000"])
plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[y_pred])
xLimit = X[:,0].min()-0.4, X[:,0].max()+0.4
yLimit = X[:,1].min()-0.4, X[:,1].max()+0.4
plt.xlim(xLimit)
plt.ylim(yLimit)
plt.xticks(())
plt.yticks(())
plt.text(.99, .01, ('%.2fs' % (t1 - t0)).lstrip('0'),
transform=plt.gca().transAxes, size=15,
horizontalalignment='right')
plot_num += 1
clusterResult.loc[name] = v_measure_score(ground_truth, y_pred), \
completeness_score(ground_truth, y_pred), homogeneity_score(ground_truth, y_pred), \
adjusted_rand_score(ground_truth, y_pred)
print(dataName[i_dataset], '(%d dim)' % dimension )
display(clusterResult.transpose())
print('-'*50)
if dimension <= 3:
plt.show()
X = target_encoded_protein.iloc[:, :77].values
y = ground_truth.copy()
target_names = fill_protein['class'].unique()
pca3 = PCA(n_components=3)
X_PCA3 = pca3.fit(X).transform(X)
lda3 = LinearDiscriminantAnalysis(n_components=3)
X_LDA3 = lda3.fit(X, y).transform(X)
from sklearn.metrics.cluster import v_measure_score, adjusted_rand_score, completeness_score, homogeneity_score
rstCol = ['v_measure', 'completeness', 'homogeneity', 'rand_score']
clusterResult = pd.DataFrame(columns=rstCol)
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.cluster import KMeans
from sklearn import cluster
# ============
# Create cluster objects
# ============
ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)
dbscan = cluster.DBSCAN(eps=params['eps'])
affinity_propagation = cluster.AffinityPropagation(
damping=params['damping'], preference=params['preference'])
estimators = [('AffinityPropagation', affinity_propagation),
('MeanShift', ms),
('DBSCAN', dbscan)
]
fignum = 1
titles = ['AffinityPropagation', 'MeanShift', 'DBSCAN']
X = X_PCA3
for name, est in estimators:
fig = plt.figure(fignum, figsize=(12, 6))
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
est.fit(X)
labels = est.labels_
cmhot = plt.get_cmap("Set1")
ax.scatter(X[:, 1], X[:, 0], X[:, 2],
c=labels, edgecolor='k', label=target_names, cmap=cmhot)
ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
ax.set_title(titles[fignum - 1])
ax.dist = 12
fignum = fignum + 1
# Result
clusterResult.loc[name] = v_measure_score(ground_truth, labels), \
completeness_score(ground_truth, labels), homogeneity_score(ground_truth, labels), \
adjusted_rand_score(ground_truth, labels)
# Plot the ground truth
ax = fig.gca(projection='3d')
fig = plt.figure(fignum, figsize=(12, 6))
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
# Reorder the labels to have colors matching the cluster results
#y = np.choose(y, range(0, len(np.unique(y)))).astype(np.float)
cmhot = plt.get_cmap("Set1")
ax.scatter(X[:, 1], X[:, 0], X[:, 2], c=ground_truth, edgecolor='k', label=y, cmap=cmhot)
ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
ax.set_title('Ground Truth')
ax.dist = 12
# Display Result
print("PCA", '(%d dim)' % dimension )
display(clusterResult.transpose())
print('-'*50)
fig.show()
from sklearn.metrics.cluster import v_measure_score, adjusted_rand_score, completeness_score, homogeneity_score
rstCol = ['v_measure', 'completeness', 'homogeneity', 'rand_score']
clusterResult = pd.DataFrame(columns=rstCol)
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.cluster import KMeans
from sklearn import cluster
# ============
# Create cluster objects
# ============
ward = cluster.AgglomerativeClustering(
n_clusters=params['n_clusters'], linkage='ward',
connectivity=connectivity)
spectral = cluster.SpectralClustering(
n_clusters=params['n_clusters'], eigen_solver='arpack',
affinity="nearest_neighbors")
average_linkage = cluster.AgglomerativeClustering(
linkage="average", affinity="cityblock",
n_clusters=params['n_clusters'], connectivity=connectivity)
birch = cluster.Birch(n_clusters=params['n_clusters'])
estimators = [('KMeans', KMeans(n_clusters=8)),
('SpectralClustering', spectral),
('Ward', ward),
('AgglomerativeClustering', average_linkage),
('Birch', cluster.Birch(n_clusters=8))
]
fignum = 1
titles = ['KMeans', 'SpectralClustering', 'Ward', 'AgglomerativeClustering', 'Birch']
X = X_PCA3
for name, est in estimators:
fig = plt.figure(fignum, figsize=(12, 6))
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
est.fit(X)
labels = est.labels_
cmhot = plt.get_cmap("Set1")
ax.scatter(X[:, 1], X[:, 0], X[:, 2],
c=labels, edgecolor='k', label=target_names, cmap=cmhot)
ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
ax.set_title(titles[fignum - 1])
ax.dist = 12
fignum = fignum + 1
# Result
clusterResult.loc[name] = v_measure_score(ground_truth, labels), \
completeness_score(ground_truth, labels), homogeneity_score(ground_truth, labels), \
adjusted_rand_score(ground_truth, labels)
# Plot the ground truth
ax = fig.gca(projection='3d')
fig = plt.figure(fignum, figsize=(12, 6))
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
# Reorder the labels to have colors matching the cluster results
#y = np.choose(y, range(0, len(np.unique(y)))).astype(np.float)
cmhot = plt.get_cmap("Set1")
ax.scatter(X[:, 1], X[:, 0], X[:, 2], c=ground_truth, edgecolor='k', label=y, cmap=cmhot)
ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
ax.set_title('Ground Truth')
ax.dist = 12
# Display Result
print("PCA", '(%d dim)' % dimension )
display(clusterResult.transpose())
print('-'*50)
fig.show()
from sklearn.metrics.cluster import v_measure_score, adjusted_rand_score, completeness_score, homogeneity_score
rstCol = ['v_measure', 'completeness', 'homogeneity', 'rand_score']
clusterResult = pd.DataFrame(columns=rstCol)
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.cluster import KMeans
from sklearn import cluster
# ============
# Create cluster objects
# ============
ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)
dbscan = cluster.DBSCAN(eps=params['eps'])
affinity_propagation = cluster.AffinityPropagation(
damping=params['damping'], preference=params['preference'])
estimators = [('AffinityPropagation', affinity_propagation),
('MeanShift', ms),
('DBSCAN', dbscan)
]
fignum = 1
titles = ['AffinityPropagation', 'MeanShift', 'DBSCAN']
X = X_LDA3
for name, est in estimators:
fig = plt.figure(fignum, figsize=(12, 6))
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
est.fit(X)
labels = est.labels_
cmhot = plt.get_cmap("Set1")
ax.scatter(X[:, 1], X[:, 0], X[:, 2],
c=labels, edgecolor='k', label=target_names, cmap=cmhot)
ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
ax.set_title(titles[fignum - 1])
ax.dist = 12
fignum = fignum + 1
# Result
clusterResult.loc[name] = v_measure_score(ground_truth, labels), \
completeness_score(ground_truth, labels), homogeneity_score(ground_truth, labels), \
adjusted_rand_score(ground_truth, labels)
# Plot the ground truth
ax = fig.gca(projection='3d')
fig = plt.figure(fignum, figsize=(12, 6))
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
# Reorder the labels to have colors matching the cluster results
#y = np.choose(y, range(0, len(np.unique(y)))).astype(np.float)
cmhot = plt.get_cmap("Set1")
ax.scatter(X[:, 1], X[:, 0], X[:, 2], c=ground_truth, edgecolor='k', label=y, cmap=cmhot)
ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
ax.set_title('Ground Truth')
ax.dist = 12
# Display Result
print("LDA", '(%d dim)' % dimension )
display(clusterResult.transpose())
print('-'*50)
fig.show()
值得注意的是,降成三維後,
- AffinityPropagation 和 MeanShift 都明顯比二維 LDA 變好。
- AffinityPropagation 可分出 8 群,但 MeanShift 只分出 7 群。
- DBSCAN 還是一樣慘。
from sklearn.metrics.cluster import v_measure_score, adjusted_rand_score, completeness_score, homogeneity_score
rstCol = ['v_measure', 'completeness', 'homogeneity', 'rand_score']
clusterResult = pd.DataFrame(columns=rstCol)
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.cluster import KMeans
from sklearn import cluster
# ============
# Create cluster objects
# ============
ward = cluster.AgglomerativeClustering(
n_clusters=params['n_clusters'], linkage='ward',
connectivity=connectivity)
spectral = cluster.SpectralClustering(
n_clusters=params['n_clusters'], eigen_solver='arpack',
affinity="nearest_neighbors")
average_linkage = cluster.AgglomerativeClustering(
linkage="average", affinity="cityblock",
n_clusters=params['n_clusters'], connectivity=connectivity)
birch = cluster.Birch(n_clusters=params['n_clusters'])
estimators = [('KMeans', KMeans(n_clusters=8)),
('SpectralClustering', spectral),
('Ward', ward),
('AgglomerativeClustering', average_linkage),
('Birch', cluster.Birch(n_clusters=8))
]
fignum = 1
titles = ['KMeans', 'SpectralClustering', 'Ward', 'AgglomerativeClustering', 'Birch']
X = X_LDA3
for name, est in estimators:
fig = plt.figure(fignum, figsize=(12, 6))
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
est.fit(X)
labels = est.labels_
cmhot = plt.get_cmap("Set1")
ax.scatter(X[:, 1], X[:, 0], X[:, 2],
c=labels, edgecolor='k', label=target_names, cmap=cmhot)
ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
ax.set_title(titles[fignum - 1])
ax.dist = 12
fignum = fignum + 1
# Result
clusterResult.loc[name] = v_measure_score(ground_truth, labels), \
completeness_score(ground_truth, labels), homogeneity_score(ground_truth, labels), \
adjusted_rand_score(ground_truth, labels)
# Plot the ground truth
ax = fig.gca(projection='3d')
fig = plt.figure(fignum, figsize=(12, 6))
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
# Reorder the labels to have colors matching the cluster results
#y = np.choose(y, range(0, len(np.unique(y)))).astype(np.float)
cmhot = plt.get_cmap("Set1")
ax.scatter(X[:, 1], X[:, 0], X[:, 2], c=ground_truth, edgecolor='k', label=y, cmap=cmhot)
ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
ax.set_title('Ground Truth')
ax.dist = 12
# Display Result
print("LDA", '(%d dim)' % dimension )
display(clusterResult.transpose())
print('-'*50)
fig.show()